import fitz
from operator import itemgetter
import os
import time


# 将pdf转换为图片
def pdf_to_image(pdfPath, imagePath):
    imgcount = 0
    pdfDoc = fitz.open(pdfPath)
    for pg in range(pdfDoc.page_count):
        page = pdfDoc[pg]
        imgcount += 1
        tupleImage = page.get_images()
        lstImage = list(tupleImage)

        xref0 = lstImage[0]  # 取第一个元组
        xref1 = list(xref0)  # 元组转化为列表
        xref = xref1[0]  # 最终取得xref  ok
        img = pdfDoc.extract_image(xref)  # 获取文件扩展名，图片内容 等信息
        print("imgID:    %s" % imgcount)
        print("xref:  %s" % xref)
        with open(imagePath + str(imgcount) + ".webp", "wb") as f:
            f.write(img["image"])







def main():
    sTime = time.time()
    pdf_to_image('22政治新考案新高考全国b版教师用书.pdf', './img/img_')

    # pdf_to_TextBlocks('1.pdf', 'F:\image\pdftxt.txt')
    eTime = time.time()
    s = eTime - sTime
    print('花费的时间为：%.2f秒' % (s))


if __name__ == '__main__':
    main()